# Replication Script for "US Governors Populism Database: 
# Assessing the Impact of Donald Trump on State-Level Discourse"

#------------------------------------------------------------------------------
# 0. Setup Working Directory
#------------------------------------------------------------------------------
# Set your working directory here
# Example: setwd("C:/Users/username/Documents/governors_analysis")
# or use setwd("~/Documents/governors_analysis") for Mac/Linux
# setwd("YOUR_PATH_HERE")

#------------------------------------------------------------------------------
# 1. Package Management
#------------------------------------------------------------------------------
# Required packages
required_packages <- c(
  "tidyverse",  # for data manipulation and ggplot2
  "psych",      # for descriptive statistics
  "irr"         # for reliability statistics
)

# Install missing packages
new_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load all required packages
invisible(lapply(required_packages, library, character.only = TRUE))

#------------------------------------------------------------------------------
# 2. Data Import and Preparation
#------------------------------------------------------------------------------

# Import the data
df <- read_csv("replication_data.csv")

# Create period indicators for pre/post Trump analysis
df <- df %>%
  mutate(
    period = factor(
      ifelse(grepl("1$", state), "Pre-Trump", "Post-Trump"),
      levels = c("Pre-Trump", "Post-Trump")
    )
  )

# Identify early Trump endorsers
early_endorsers <- c(
  "Maine1", "Maine2", "New Jersey1", "New Jersey2",
  "Florida1", "Florida2", "Arizona1", "South Carolina2"
)
df$endorser_status <- ifelse(df$state %in% early_endorsers, 
                             "Early Trump Endorser", "Other")

#------------------------------------------------------------------------------
# 3. Analysis Functions
#------------------------------------------------------------------------------

# Function to conduct t-test with detailed output
conduct_ttest <- function(group1, group2, group1_name, group2_name) {
  test_result <- t.test(group1, group2)
  
  cat(sprintf("\nComparison between %s and %s:\n", group1_name, group2_name))
  cat("Mean difference:", round(diff(test_result$estimate), 3), "\n")
  cat("t-statistic:", round(test_result$statistic, 3), "\n")
  cat("p-value:", round(test_result$p.value, 3), "\n")
  cat("95% CI:", paste(round(test_result$conf.int, 3), collapse = ", "), "\n")
  
  return(test_result)
}

# Function to calculate descriptive statistics
calculate_descriptives <- function(data, group_var = NULL) {
  if(is.null(group_var)) {
    stats <- describe(data$rounded_score)
    return(data.frame(
      group = "Overall",
      n = stats$n,
      mean = round(stats$mean, 2),
      sd = round(stats$sd, 2),
      median = round(stats$median, 2),
      min = round(stats$min, 2),
      max = round(stats$max, 2)
    ))
  } else {
    data %>%
      group_by(!!sym(group_var)) %>%
      summarise(
        n = n(),
        mean = round(mean(rounded_score, na.rm = TRUE), 2),
        sd = round(sd(rounded_score, na.rm = TRUE), 2),
        median = round(median(rounded_score, na.rm = TRUE), 2),
        min = round(min(rounded_score, na.rm = TRUE), 2),
        max = round(max(rounded_score, na.rm = TRUE), 2)
      )
  }
}

#------------------------------------------------------------------------------
# 4. Main Analyses
#------------------------------------------------------------------------------

# 4.1 Pre-Trump vs Post-Trump Analysis
pre_post_ttest <- conduct_ttest(
  df$rounded_score[grep("1", df$state)],
  df$rounded_score[grep("2", df$state)],
  "Pre-Trump", "Post-Trump"
)

# 4.2 Party Differences Analysis
party_ttest <- conduct_ttest(
  df$rounded_score[df$party == "R"],
  df$rounded_score[df$party == "D"],
  "Republicans", "Democrats"
)

# 4.3 Early Endorsers Analysis
endorser_ttest <- conduct_ttest(
  df$rounded_score[df$early_endorsment == "Yes" & df$party == "R"],
  df$rounded_score[df$early_endorsment == "No" & df$party == "R"],
  "Early Endorsers", "Non-Early Endorsers"
)

# 4.4 Reliability Analysis
reliability_stats <- list(
  # Calculate Cronbach's alpha for agreement between coders
  cronbach = alpha(cbind(df$coder1_score, df$coder2_score)),
  
  # Calculate Krippendorff's alpha for agreement between coders
  kripp = kripp.alpha(t(as.matrix(df[, c("coder1_score", "coder2_score")])), 
                      method = "interval")
)

#------------------------------------------------------------------------------
# 5. Data Preparation for Visualization
#------------------------------------------------------------------------------

# Calculate statistics for different groups
group_stats <- bind_rows(
  # Pre/Post Trump (all)
  df %>%
    group_by(period) %>%
    summarise(
      group = paste0(period, " (all)"),
      mean = mean(rounded_score, na.rm = TRUE),
      se = sd(rounded_score, na.rm = TRUE) / sqrt(n())
    ),
  
  # Party overall
  df %>%
    group_by(party) %>%
    summarise(
      group = paste0(if_else(party == "R", "Republicans", "Democrats"), " (all)"),
      mean = mean(rounded_score, na.rm = TRUE),
      se = sd(rounded_score, na.rm = TRUE) / sqrt(n())
    ),
  
  # Party by period
  df %>%
    group_by(period, party) %>%
    summarise(
      group = paste0(period, " ", if_else(party == "R", "Republicans", "Democrats")),
      mean = mean(rounded_score, na.rm = TRUE),
      se = sd(rounded_score, na.rm = TRUE) / sqrt(n())
    ) %>%
    ungroup(),
  
  # Early endorsers and non-early endorsers (Republicans only)
  df %>%
    filter(party == "R") %>%
    group_by(early_endorsment) %>%
    summarise(
      group = paste0(if_else(early_endorsment == "Yes", 
                             "Early endorsers Republicans",
                             "Non-early endorsers Republicans")),
      mean = mean(rounded_score, na.rm = TRUE),
      se = sd(rounded_score, na.rm = TRUE) / sqrt(n())
    )
)

# Add reference values (Trump and GPD)
data <- bind_rows(
  data.frame(
    group = c("Trump", "GPD (v 2.0) average"),
    mean = c(0.8, 0.4),
    se = c(0, 0)
  ),
  group_stats
)

# Set the group order (reversed for bottom-to-top plotting)
data$group <- factor(data$group, 
                     levels = rev(c(
                       "Trump",
                       "GPD (v 2.0) average",
                       "Pre-Trump (all)",
                       "Post-Trump (all)",
                       "Republicans (all)",
                       "Democrats (all)",
                       "Pre-Trump Republicans",
                       "Post-Trump Democrats",
                       "Post-Trump Republicans",
                       "Pre-Trump Democrats",
                       "Early endorsers Republicans",
                       "Non-early endorsers Republicans"
                     )))

#------------------------------------------------------------------------------
# 6. Create and Save Plot
#------------------------------------------------------------------------------

# Calculate statistics for different groups
group_stats <- bind_rows(
  # Pre/Post Trump (all)
  df %>%
    group_by(period) %>%
    summarise(
      group = paste0(period, " (all)"),
      mean = mean(rounded_score, na.rm = TRUE),
      se = sd(rounded_score, na.rm = TRUE) / sqrt(n()),
      .groups = 'drop'
    ),
  
  # Party overall
  df %>%
    group_by(party) %>%
    summarise(
      group = paste0(if_else(party == "R", "Republicans", "Democrats"), " (all)"),
      mean = mean(rounded_score, na.rm = TRUE),
      se = sd(rounded_score, na.rm = TRUE) / sqrt(n()),
      .groups = 'drop'
    ),
  
  # Party by period combinations
  df %>%
    group_by(period, party) %>%
    summarise(
      group = paste0(period, " ", if_else(party == "R", "Republicans", "Democrats")),
      mean = mean(rounded_score, na.rm = TRUE),
      se = sd(rounded_score, na.rm = TRUE) / sqrt(n()),
      .groups = 'drop'
    ),
  
  # Early endorsers vs non-early endorsers (Republicans only)
  df %>%
    filter(party == "R") %>%
    mutate(group = if_else(early_endorsment == "Yes", 
                           "Early endorsers Republicans",
                           "Non-early endorsers Republicans")) %>%
    group_by(group) %>%
    summarise(
      mean = mean(rounded_score, na.rm = TRUE),
      se = sd(rounded_score, na.rm = TRUE) / sqrt(n()),
      .groups = 'drop'
    )
) %>%
  distinct() # Remove any potential duplicates

# Add reference values (Trump and GPD)
data <- bind_rows(
  data.frame(
    group = c("Trump", "GPD (v 2.0) average"),
    mean = c(0.8, 0.4),
    se = c(0, 0)
  ),
  group_stats
) %>%
  # Ensure each group appears exactly once
  distinct(group, .keep_all = TRUE)

# Set the group order (reversed for bottom-to-top plotting)
data$group <- factor(data$group, 
                     levels = rev(c(
                       "Trump",
                       "GPD (v 2.0) average",
                       "Pre-Trump (all)",
                       "Post-Trump (all)",
                       "Republicans (all)",
                       "Democrats (all)",
                       "Pre-Trump Republicans",
                       "Post-Trump Democrats",
                       "Post-Trump Republicans",
                       "Pre-Trump Democrats",
                       "Early endorsers Republicans",
                       "Non-early endorsers Republicans"
                     )))

#------------------------------------------------------------------------------
# 6. Create and Save Plot
#------------------------------------------------------------------------------

# Create the plot
main_plot <- ggplot(data, aes(x = mean, y = group)) +
  # Add vertical reference lines
  geom_vline(xintercept = 0.4, 
             linetype = "dotted", 
             color = "gray40", 
             size = 0.5) +
  geom_vline(xintercept = 0.8, 
             linetype = "dashed", 
             color = "gray40", 
             size = 0.5) +
  # Add points and error bars
  geom_point(size = 3, color = "black") +
  geom_errorbarh(aes(xmin = mean - 1.96 * se, 
                     xmax = mean + 1.96 * se),
                 height = 0.2, 
                 size = 0.5) +
  # Add value labels
  geom_text(aes(label = sprintf("%.2f", mean)),
            hjust = -0.5,
            vjust = 1.5,
            size = 3,
            fontface = "bold") +
  # Add reference line labels
  annotate("text", 
           x = 0.4, 
           y = -0.5, 
           label = "GPD (v 2.0) average (0.4)",
           angle = 90,
           hjust = 1,
           size = 3,
           color = "gray40") +
  annotate("text",
           x = 0.8,
           y = -0.5,
           label = "Trump's populism score (0.8)",
           angle = 90,
           hjust = 1,
           size = 3,
           color = "gray40") +
  # Set labels and scales
  labs(x = "Mean Populism Score",
       y = "",
       caption = "Error bars represent 95% confidence intervals. Trump's and GPD scores are single data points.") +
  scale_x_continuous(limits = c(0, 1),
                     breaks = seq(0, 1, 0.1),
                     labels = scales::number_format(accuracy = 0.1)) +
  # Set theme
  theme_minimal(base_size = 12) +
  theme(
    axis.title.x = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10, color = "black"),
    axis.text.y = element_text(hjust = 1),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.border = element_rect(color = "black", fill = NA, size = 1),
    plot.caption = element_text(hjust = 0.5, size = 8, face = "italic"),
    plot.margin = margin(t = 10, r = 10, b = 10, l = 10, unit = "pt")
  )

#------------------------------------------------------------------------------
# 6. Save Results
#------------------------------------------------------------------------------

# Create directories if they don't exist
dir.create("figures", showWarnings = FALSE)
dir.create("data", showWarnings = FALSE)

# Save the plot
ggsave("figures/populism_scores_plot.tiff", 
       plot = main_plot, 
       width = 11, 
       height = 5.5, 
       dpi = 1200,
       compression = "lzw",
       units = "in",
       bg = "white")

#------------------------------------------------------------------------------
# 7. Generate Descriptive Statistics Table
#------------------------------------------------------------------------------

# Function to calculate statistics for a specific group
calculate_group_stats <- function(data, condition) {
  data %>%
    filter(!!rlang::parse_expr(condition)) %>%
    summarise(
      mean = round(mean(rounded_score, na.rm = TRUE), 2),
      median = round(median(rounded_score, na.rm = TRUE), 2),
      sd = round(sd(rounded_score, na.rm = TRUE), 2),
      se = round(sd(rounded_score, na.rm = TRUE) / sqrt(n()), 2)
    )
}

# Create descriptive statistics table
descriptive_stats <- bind_rows(
  # Pre-Trump (all)
  calculate_group_stats(df, "period == 'Pre-Trump'") %>%
    mutate(Group = "Pre-Trump (all)"),
  
  # Post-Trump (all)
  calculate_group_stats(df, "period == 'Post-Trump'") %>%
    mutate(Group = "Post-Trump (all)"),
  
  # Republicans (all)
  calculate_group_stats(df, "party == 'R'") %>%
    mutate(Group = "Republicans (all)"),
  
  # Democrats (all)
  calculate_group_stats(df, "party == 'D'") %>%
    mutate(Group = "Democrats (all)"),
  
  # Pre-Trump Republicans
  calculate_group_stats(df, "period == 'Pre-Trump' & party == 'R'") %>%
    mutate(Group = "Pre-Trump Republicans"),
  
  # Pre-Trump Democrats
  calculate_group_stats(df, "period == 'Pre-Trump' & party == 'D'") %>%
    mutate(Group = "Pre-Trump Democrats"),
  
  # Post-Trump Republicans
  calculate_group_stats(df, "period == 'Post-Trump' & party == 'R'") %>%
    mutate(Group = "Post-Trump Republicans"),
  
  # Post-Trump Democrats
  calculate_group_stats(df, "period == 'Post-Trump' & party == 'D'") %>%
    mutate(Group = "Post-Trump Democrats"),
  
  # Early endorsers Republicans
  calculate_group_stats(df, "party == 'R' & early_endorsment == 'Yes'") %>%
    mutate(Group = "Early endorsers Republicans"),
  
  # Non-early endorsers Republicans
  calculate_group_stats(df, "party == 'R' & early_endorsment == 'No'") %>%
    mutate(Group = "Non-early endorsers Republicans")
) %>%
  select(Group, mean, median, sd, se)

# Print table in LaTeX format
cat("\\begin{tabular}{lrrrr}\n")
cat("  \\hline\n")
cat("  Group & Mean & Median & SD & SE \\\\ \n")
cat("  \\hline\n")
for(i in 1:nrow(descriptive_stats)) {
  cat(sprintf("  %s & %.2f & %.2f & %.2f & %.2f \\\\ \n",
              descriptive_stats$Group[i],
              descriptive_stats$mean[i],
              descriptive_stats$median[i],
              descriptive_stats$sd[i],
              descriptive_stats$se[i]))
}
cat("  \\hline\n")
cat("\\end{tabular}\n")

# Print session info for replication
sessionInfo()